This RMD is the first in the workflow and reads in, cleans, and visualizes the stripchart stream height data for the 2017 - 2021 water years at the S2 bog site of the Marcell Experimental Forest, Northern Research Station.
External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2date column that follows the format
YYYY-MM-DD (and does not include a timestamp) for ease of
sorting/manipulating dates#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"
#read in the 2017 data
wy2017 <- read_excel(path = here(filepath, "WY2017.S2_Breakpoint.xlsx"),
skip =3) #skip the top 3 rows of the Excel Sheet
#clean the 2017 data
wy2017_clean <- wy2017 %>%
#change column names
rename(datetime = "Date/time",
stream_height_ft = "Stage.ft") %>%
#remove unnecessary columns
select(c("datetime", "stream_height_ft")) %>%
#format column names into lower_snake_case
clean_names() %>%
#create a date column
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"))
#save clean data CSV to use in future RMD files
write.csv(x = wy2017_clean,
file = file.path(here("intermediate_data", "wy2017_clean.csv")),
row.names = FALSE)
#test the 2017 data
if(nrow(wy2017) != nrow(wy2017_clean)) stop("Check clean dataframe dimensions")
#plot
p_2017 <- ggplot(data = wy2017_clean) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "WY 2017 S2 Bog Weir Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#static plot
#p_2017
#interactive plot
ggplotly(p_2017)
External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2December 1-3 clock stopped, no record
December 8-10 pin reached end of chart, no record
Dec 11-end, clock is barely running. No way to tell which days are what on the chart.
November 10-14 clock stopped, no record
November 25- December 4 clock stopped, no record Possibly the clock was moving super slowly for some of it?
For Oct 9-10 there was no rain for flow to increase
Oct 11-15 clock was stuck and the pin did not move forward in time
External-MEF_DATA/Hydro/Streamflow/L0_daily/StripCharts/DailyBreakpoint/2018#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"
#create a function to specify custom time intervals
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]
#read in the 2018 data
wy2018 <- read_excel(path = here(filepath, "WY2018.S2_Breakpoint.xlsx"),
skip =3)
#clean the 2018 data
wy2018_clean <- wy2018 %>%
rename(datetime = "Date/time",
stream_height_ft = "Stage.ft") %>%
select(c("datetime", "stream_height_ft")) %>%
clean_names() %>%
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"))
#assign October NA values
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-10-09', '2018-10-10'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-10-11', '2018-10-15'))] <- NA
#assign November NA values
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-11-10', '2018-11-14'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-11-25', '2018-12-04'))] <- NA
#assign December NA values
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-01', '2018-12-03'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-08', '2018-12-10'))] <- NA
wy2018_clean$stream_height_ft[wy2018_clean$date %between% as.Date(c('2018-12-11', '2018-12-31'))] <- NA
#test the 2018 data
if(nrow(wy2018) != nrow(wy2018_clean)) stop("Check clean dataframe dimensions")
#plot
p_2018_clean <- ggplot(data = wy2018_clean) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "WY 2018 S2 Bog Weir Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#note the weird oscillating values from Oct 31 - Nov 1
#static plot
#p_2018_clean
#interactive plot
ggplotly(p_2018_clean)
Next, we need to remove unusual stream height values from Oct 31 - Nov 1, 2018. During this time period, the stream height values oscillated rapidly, likely indicating human error in recording the data. Thus, we need to remove the erroneous values to smooth out the line. For more details see the next code chunk below.
wy2018_altered <- wy2018_clean
#for now, remove erroneous values < 0.10 during the time period of interest
wy2018_altered <- subset(wy2018_altered, !(wy2018_altered$date %between% as.Date(c('2018-10-31', '2018-11-01')) & wy2018_altered$stream_height_ft < 0.10))
#test the 2018 altered data - there should be fewer rows in the altered dataframe than the original/cleaned dataframe
if(nrow(wy2018_altered) == nrow(wy2018_clean)) stop("Check altered dataframe dimensions")
#plot
p_2018_altered <- ggplot(data = wy2018_altered) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "WY 2018 S2 Bog Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#save clean data CSV to use in future RMD files
write.csv(x = wy2018_altered,
file = file.path(here("intermediate_data", "wy2018_altered.csv")),
row.names = FALSE)
#note that the wonky values have been removed and the graphis now smoothed out
#static plot
#p_2018_altered
#interactive plot
ggplotly(p_2018_altered)
During this time, it is most likely that the dates were read/input incorrectly, resulting in an unnatural oscillation of stream flow values on Oct 31 and Nov 1 of 2018. Therefore, we removed values < 0.10 during this time period to smooth out the overall curve of stream height values.
#subset the time period of interest
subset_2018 <- wy2018_clean %>%
subset(date %between% as.Date(c('2018-10-30', '2018-11-02')))
#plot the data
p_subset <- ggplot(data = subset_2018) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
labs(title = "Wonky Oct 31 - Nov 1 Streamflow Values")
#static plot
p_subset
#interactive plot
#ggplotly(p_subset)
#remove unusual values below 0.10
subset_2018_altered <- wy2018_clean %>%
subset(date %between% as.Date(c('2018-10-30', '2018-11-02'))) %>%
subset(stream_height_ft > 0.10)
#plot the altered data
p_subset_altered <- ggplot(data = subset_2018_altered) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
labs(title = "Fixed Oct 31 - Nov 1 Streamflow Values")
#static plot
p_subset_altered
#interactive plot
#ggplotly(p_subset_altered)
External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2April 1-2, Line and time is off
April 8-17. Line doesn’t fluctuate though there should’ve been rain or if not rain, it should’ve fallen from how high the flow is
Proven in that the adjustment for 4/15 flow is great and S6 Weir chart response. Therefore, no record
April 17-22 discounted because pen level is off with no way to account for it
February: Clock errors, record compromised. Flow is below zero for month Clock restarted 2/21/2019. Measurements still below zero.
January: Clock errors meant that time could not be used to record accurate measurements. Measurements tend to be around 0.001 and chart line is there or below zero. Physical measurements indicate flow until at least 1/15/2019
March 14, Pen reverses and goes off the chart
March 18th, Pen placed back on chart, still reversed (fixed 4/23)
March 30-31 Pen stays in place even though there should be a response. Likely because it reached the bottom of the chart (pen was reversed)
September: Chart did not run from 9/3-9/10
External-MEF_DATA/Hydro/Streamflow/L0_daily/StripCharts/DailyBreakpoint/2019/digitized/S2-19/#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"
#read in the 2019 data
wy2019 <- read_excel(path = here(filepath, "WY2019.S2_Breakpoint.xlsx"),
skip =3)
#clean the 2019 data
wy2019_clean <- wy2019 %>%
rename(datetime = "Date/time",
stream_height_ft = "Stage.ft") %>%
select(c("datetime", "stream_height_ft")) %>%
clean_names() %>%
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"))
#assign February NA values
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-01', '2019-04-02'))] <- NA
#assign March NA values
wy2019_clean$stream_height_ft[wy2019_clean$date == as.Date('2019-03-14')] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date == as.Date('2019-03-18')] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-03-30', '2019-03-31'))] <- NA
#assign April NA values
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-01', '2019-04-02'))] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-08', '2019-04-17'))] <- NA
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-04-17', '2019-04-22'))] <- NA
#assign September NA values
wy2019_clean$stream_height_ft[wy2019_clean$date %between% as.Date(c('2019-09-03', '2019-09-10'))] <- NA
#save clean data CSV to use in future RMD files
write.csv(x = wy2019_clean,
file = file.path(here("intermediate_data", "wy2019_clean.csv")),
row.names = FALSE)
#test the 2019 data
if(nrow(wy2019) != nrow(wy2019_clean)) stop("Check clean dataframe dimensions")
#plot
p_2019 <- ggplot(data = wy2019_clean) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (Ft)",
title = "WY 2019 S2 Bog Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#static plot
#p_2019
#interactive plot
ggplotly(p_2019)
External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2External-MEF_DATA/Hydro/Streamflow/L0_daily/StripCharts/DailyBreakpoint/2020/digitized/S2-20/wy2020_s2_gaps.txt#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"
#read in the 2020 data
wy2020 <- read_excel(path = here(filepath, "WY2020.S2_Breakpoint.xlsx"),
skip =3)
#clean the 2020 data
wy2020_clean <- wy2020 %>%
rename(datetime = "Date/time",
stream_height_ft = "Stage.ft") %>%
select(c("datetime", "stream_height_ft")) %>%
clean_names() %>%
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"))
#create date range to fill in with NA values
start_date <- as.POSIXct("7/28/2020 13:40:00",
format = '%m/%d/%Y %H:%M:%S',
tz = "GMT")
end_date <- as.POSIXct("8/11/2020 00:00:00",
format = '%m/%d/%Y %H:%M:%S',
tz = "GMT")
#create a function to specify custom time intervals
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]
#assign February NA values
wy2020_clean$stream_height_ft[wy2020_clean$datetime %between% c(start_date, end_date)] <- NA
#save clean data CSV to use in future RMD files
write.csv(x = wy2020_clean,
file = file.path(here("intermediate_data", "wy2020_clean.csv")),
row.names = FALSE)
#test the 2020 data
if(nrow(wy2020) != nrow(wy2020_clean)) stop("Check clean dataframe dimensions")
#plot
p_2020 <- ggplot(data = wy2020_clean) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (Ft)",
title = "WY 2020 S2 Bog Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#static plot
#p_2020
#interactive plot
ggplotly(p_2020)
External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S22021-11-11 23:59:00 through 2021-12-29 00:00:00 (clock running fast on chart then clock died, well froze. Datalogger will have most of the data and is correct, except shaft encoder float froze at .14, see scanned chart for more remarks)
and also (beginning values off, should be .18 float frozen for several hours) 2021-03-04 00:00:00 through 2021-03-09 00:00:00 (need to correct for start of flow. See datalogger data)
External-MEF_DATA/Hydro/Streamflow/L0_daily/StripCharts/DailyBreakpoint/2021/digitized/S2-21/wy2021_s2_gaps.txt#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/StripCharts/AnnualBreakPoint/S2"
#read in the 2021 data
wy2021 <- read_excel(path = here(filepath, "WY2021.S2_Breakpoint.xlsx"),
skip =3)
#clean the 2021 data
wy2021_clean <- wy2021 %>%
rename(datetime = "Date/time",
stream_height_ft = "Stage.ft") %>%
select(c("datetime", "stream_height_ft")) %>%
clean_names() %>%
mutate(date = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"))
#identify date intervals that need to be filled w/NAs
date1 <- as.POSIXct("2021-03-04 00:00:00",
format = '%Y-%m-%d %H:%M:%S',
tz = "GMT")
date2 <- as.POSIXct("2021-03-09 00:00:00",
format = '%Y-%m-%d %H:%M:%S',
tz = "GMT")
date3 <- as.POSIXct("2021-11-11 23:59:00",
format = '%Y-%m-%d %H:%M:%S',
tz = "GMT")
date4 <- as.POSIXct("2021-12-29 00:00:00",
format = '%Y-%m-%d %H:%M:%S',
tz = "GMT")
#create a function to specify custom time intervals
`%between%` <- function(x, interval) x >= interval[1] & x <= interval[2]
#assign March NA values
wy2021_clean$stream_height_ft[wy2021_clean$datetime %between% c(date1, date2)] <- NA
#assign Nov-Dec NA values
wy2021_clean$stream_height_ft[wy2021_clean$datetime %between% c(date3, date4)] <- NA
#save clean data CSV to use in future RMD files
write.csv(x = wy2021_clean,
file = file.path(here("intermediate_data", "wy2021_clean.csv")),
row.names = FALSE)
#test the 2021 data
if(nrow(wy2021) != nrow(wy2021_clean)) stop("Check clean dataframe dimensions")
#plot
p_2021 <- ggplot(data = wy2021_clean) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (Ft)",
title = "WY 2021 S2 Bog Stream Height") +
theme(plot.title = element_text(hjust = 0.5))
#static plot
#p_2021
#interactive plot
ggplotly(p_2021)
#combine 2017 - 2021 data
all_streamflow <- rbind(wy2017_clean,
wy2018_altered,
wy2019_clean,
wy2020_clean,
wy2021_clean)
#save clean data CSV to use in future RMD files
write.csv(x = all_streamflow,
file = file.path(here("intermediate_data", "all_streamflow.csv")),
row.names = FALSE)
#plot
p_all <- ggplot(data = all_streamflow) +
geom_line(aes(x = datetime, y = stream_height_ft)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (Ft)",
title = "S2 Bog Stream Height (WY 2017 - 2021)") +
theme(plot.title = element_text(hjust = 0.5))
#static plot
#p_all
#interactive plot
ggplotly(p_all)